import datetime
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import pandas as pd
import numpy as np


url = "https://tianqi.2345.com/Pc/GetHistory"
def crawl_html(year, month,id):
    """依据年月爬取对应的数据"""
    params = {'areaInfo[areaId]': {id},
            'areaInfo[areaType]': 2,
            'date[year]': year,
            'date[month]': month}

    headers = {'User-Agent':'''Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'''}

    response = requests.get(url, headers=headers, params=params)
    data = response.json()["data"]
    df = pd.read_html(data)[0]
    return df

#获取城市对应id
def getCityid(cityname):
    html = requests.get('https://tianqi.2345.com/js/citySelectData.js').content
    text = html.decode('gbk')
    city = re.findall('([1-5]\d{4})\-[A-Z]\s(.*?)\-\d{5}',text)  #只提取了地级市及以上城市的名称和代码，5以上的是县级市  
    city = list(set(city))                    #去掉重复城市数据
    print('城市列表获取成功')
    my_tuple = city
    my_tuple[0], my_tuple[1] = my_tuple[1], my_tuple[0]
    cityid_dict1 = dict(my_tuple)
    cityid_dict = {v: k for k, v in cityid_dict1.items()}
    cityid  = cityid_dict[cityname]
    print(cityid)
    return cityid

# 获取任意事件段的历史天气数据
def gethisda(cityname,year1):
    df_list = []
    now = datetime.date.today()
    year = now.year
    id = getCityid(cityname)
    for year in range(year1, year+1):
        print("爬取：%d年" % (year))
        for month in range(1, 13):
            
            df = crawl_html(year, month, id)
            df_list.append(df)
    result_df = pd.concat(df_list, ignore_index=True)
    return result_df

""" # 测试
city_data = gethisda("北京")
city_data.to_csv("北京天气.csv", index=False) """